library("haven")
dd <- read_sas("C:\\Users\\Public\\Documents\\Sas_work_dir\\b20165.sas7bdat")
dd$wage <- dd$wage + runif(nrow(dd))/10000

dd$lnwage <- log(dd$wage)
dd$mlnwage <- mean(dd$lnwage)
dd$lnwage_et <- ave(dd$lnwage,dd$est,FUN=mean)
dd$lnwage_w_et <- dd$lnwage-dd$lnwage_et
dd$lnwage_et_ctr <- dd$lnwage_et-mean(dd$lnwage_et)
dd$est_nb <- ave(1-is.na(dd$lnwage),dd$est,FUN=sum)
dd$t10 <- (dd$lnwage>quantile(dd$lnwage,0.9))*1
dd$t10_iso <- ifelse(dd$t10>0,(ave(dd$t10,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)


v_all <- var(dd$lnwage)
v_with <- var(dd$lnwage_w_et)
v_bet <- var(dd$lnwage_et)

res <- data.frame(v_all,
                   v_with,
                   v_bet,
                   v_bet/v_all,
                   mean(dd$t10_iso,na.rm=T))
colnames(res) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res



#INCREASING VARIANCE BY 10%
bet_par <- 1^0.5
wit_par <- 1^0.5
tot_par <- 1.1^0.5

dd$lnwage_p2 <- dd$mlnwage + (wit_par*dd$lnwage_w_et+bet_par*dd$lnwage_et_ctr)*tot_par
summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[2,]/res[1,]


#DEREASING TOTAL VARIANCE BY 10%
bet_par <- 1^0.5
wit_par <- 1^0.5
tot_par <- 0.9^0.5

dd$lnwage_p2 <- dd$mlnwage + (wit_par*dd$lnwage_w_et+bet_par*dd$lnwage_et_ctr)*tot_par
summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[nrow(res),]/res[1,]

#INCREASING WITHIN VARIANCE BY 10%
bet_par <- 1^0.5
wit_par <- 1.1^0.5
tot_par <- 1^0.5

dd$lnwage_p2 <- dd$mlnwage + (wit_par*dd$lnwage_w_et+bet_par*dd$lnwage_et_ctr)*tot_par
summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[nrow(res),]/res[1,]

#DECREASING WITHIN VARIANCE BY 10%
bet_par <- 1^0.5
wit_par <- 0.9^0.5
tot_par <- 1^0.5

dd$lnwage_p2 <- dd$mlnwage + (wit_par*dd$lnwage_w_et+bet_par*dd$lnwage_et_ctr)*tot_par
summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[nrow(res),]/res[1,]

#INCREASING BETWEEN VARIANCE BY 10%
bet_par <- 1.1^0.5
wit_par <- 1^0.5
tot_par <- 1^0.5

dd$lnwage_p2 <- dd$mlnwage + (wit_par*dd$lnwage_w_et+bet_par*dd$lnwage_et_ctr)*tot_par
summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[nrow(res),]/res[1,]

#DECREASING BETWEEN VARIANCE BY 10%
bet_par <- 0.9^0.5
wit_par <- 1.0^0.5
tot_par <- 1^0.5

dd$lnwage_p2 <- dd$mlnwage + (wit_par*dd$lnwage_w_et+bet_par*dd$lnwage_et_ctr)*tot_par
summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[nrow(res),]/res[1,]

#DECREASING WITHIN INEQUALITY BY 10% AND BETWEEN INEQUALITY BY 5%
bet_par <- 0.95^0.5
wit_par <- 0.9^0.5
tot_par <- 1^0.5

dd$lnwage_p2 <- dd$mlnwage + (wit_par*dd$lnwage_w_et+bet_par*dd$lnwage_et_ctr)*tot_par
summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[nrow(res),]/res[1,]

#DECREASING WAGES by 1% IN ESTABLISHMENTS WITH NO TOP EARNERS
dd$est_notop <- (ave(dd$t10,dd$est,FUN=sum)==0)*1
bot_est_var <- 0.99
summary(dd$est_notop)

dd$lnwage_p2 <- ifelse(dd$est_notop==1,bot_est_var*dd$lnwage,dd$lnwage)

summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[nrow(res),]/res[1,]


# INCREASING WAGES by 1% IN TOP 10% ESTABLISHMENTS 
# INCREASING WAGES by 1% IN BOTTOM 50% ESTABLISHMENTS

# res <- res[-nrow(res),]

bot_est_var <- 1.01
top_est_var <- 1.01

dd$top_est <- (dd$lnwage_et>quantile(dd$lnwage_et,0.90)*1)
dd$bot_est <- (dd$lnwage_et<quantile(dd$lnwage_et,0.5)*1)

dd$lnwage_p2 <- ifelse(dd$bot_est==1,bot_est_var*dd$lnwage,
                       ifelse(dd$top_est==1,top_est_var*dd$lnwage,
                       dd$lnwage))

summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[nrow(res),]/res[1,]


# DECREASING WAGES by 1% IN TOP 10% ESTABLISHMENTS 
# DECREASING WAGES by 1% IN BOTTOM 50% ESTABLISHMENTS

# res <- res[-nrow(res),]

bot_est_var <- 0.99
top_est_var <- 0.99

dd$top_est <- (dd$lnwage_et>quantile(dd$lnwage_et,0.90)*1)
dd$bot_est <- (dd$lnwage_et<quantile(dd$lnwage_et,0.5)*1)

dd$lnwage_p2 <- ifelse(dd$bot_est==1,bot_est_var*dd$lnwage,
                       ifelse(dd$top_est==1,top_est_var*dd$lnwage,
                              dd$lnwage))

summary(dd$lnwage_p2)
summary(dd$lnwage)
dd$t10_p2 <- (dd$lnwage_p2>quantile(dd$lnwage_p2,0.9))*1
dd$t10_iso_p2 <- ifelse(dd$t10_p2>0,(ave(dd$t10_p2,dd$est,FUN=sum)-1)/(dd$est_nb-1),NA)
v_all_p2 <- var(dd$lnwage_p2)
v_with_p2 <- var(dd$lnwage_p2-ave(dd$lnwage_p2,dd$est,FUN=mean))
v_bet_p2 <- var(ave(dd$lnwage_p2,dd$est,FUN=mean))

res1 <- data.frame(v_all_p2,
                   v_with_p2,
                   v_bet_p2,
                   v_bet_p2/v_all_p2,
                   mean(dd$t10_iso_p2,na.rm=T))
colnames(res1) <- c("v_all","v_with","v_bet","v_bet_share","t10_iso")
res1

res <- rbind(res,res1)
t(res)

res[nrow(res),]/res[1,]

res$n <- nrow(dd)

simulation <- c("basis 2016 estimation",
                "10% increase in total variance",
                "10% decrease in total variance",
                "10% increase in within variance",
                "10% decrease in within variance",
                "10% increase in between variance",
                "10% decrease in between variance",
                "10% increase in within variance and 5% decrease in between variance",
                "1% wage decrease in workplace with no top 10% earners",
                "1% wage increase in both bottom 50% workplace and top 10% workplaces",
                "1% wage decrease in both bottom 50% workplace and top 10% workplaces")

res$type <- simulation

res

write.csv(res,file="C:\\Users\\Public\\Documents\\Olivier\\Ineq_simul.csv")
